import re

def clean_text(text):
    pattern = re.compile(r'<\w*>|</\w*>')
    result = pattern.search(text)
    return result

def main():
    output = open("train_zh_clean", "w", encoding="utf-8")
    with open("train.zh", "r", encoding="utf-8") as f:
        text = f.readlines()
        for line in text:
            result = clean_text(line)
            if result is None:
                output.write(line)

    output.close()

if __name__ == "__main__":
    main()